rm(list=ls(all=T))
Sys.setlocale("LC_ALL","C")
## [1] "C"
pacman::p_load(dplyr, ggplot2, plotly)
load("rdata/Z.rdata")
library(maps)
## Warning: package 'maps' was built under R version 3.5.3
Brazil = map_data("world") %>% filter(region=="Brazil")
brazilPlot = ggplot() +
geom_polygon(data = Brazil, aes(x=long, y = lat, group = group), fill="gray")
# Removing some outliers
#Brazils most Northern spot is at 5 deg 16′ 27.8″ N latitude.;
Geo = Geo[Geo$geolocation_lat <= 5.27438888,]
#it’s most Western spot is at 73 deg, 58′ 58.19″W Long.
Geo = Geo[Geo$geolocation_lng >= -73.98283055,]
#It’s most southern spot is at 33 deg, 45′ 04.21″ S Latitude.
Geo = Geo[Geo$geolocation_lat >= -33.75116944,]
#It’s most Eastern spot is 34 deg, 47′ 35.33″ W Long.
Geo = Geo[Geo$geolocation_lng <= -34.79314722,]
# selllocation = Geo %>%
# group_by(geolocation_city) %>%
# summarise(selllat = max(geolocation_lat),
# selllng = max(geolocation_lng))
# library(Imp)
# install.packages("Imp")
n_distinct(Geo$geolocation_zip_code_prefix)
## [1] 19010
table(Geo$geolocation_zip_code_prefix) %>% table %>% tail
## .
## 832 879 907 965 1102 1146
## 1 1 1 1 1 1
每個zip_code只取一個點
custlocation = Geo %>%
group_by(geolocation_zip_code_prefix) %>%
summarise(custlat = max(geolocation_lat),
custlng = max(geolocation_lng))
merge data: Custome + Order + Geolocation
# custOrd = left_join(Ord, Cust, by="customer_id")
# custOrdGeo = merge(custOrd, Geo, by.x = "customer_zip_code_prefix", by.y = "geolocation_zip_code_prefix", all.x = T)
COG = Ord %>%
left_join(., Cust, by="customer_id") %>%
left_join(.,custlocation, by= c("customer_zip_code_prefix"="geolocation_zip_code_prefix"))
看每個order都來自哪裡
g = brazilPlot +
geom_point(data = COG,aes(x=custlng,y=custlat,color=customer_state),size=0.2)
ggplotly(g)